1 package edu.jiangxin.apktoolbox.pdf;
2
3 import com.itextpdf.kernel.pdf.*;
4 import org.apache.logging.log4j.LogManager;
5 import org.apache.logging.log4j.Logger;
6 import org.apache.pdfbox.Loader;
7 import org.apache.pdfbox.pdmodel.PDDocument;
8 import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
9 import org.apache.pdfbox.pdmodel.PDPage;
10 import org.apache.pdfbox.pdmodel.PDPageTree;
11 import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
12 import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
13 import org.apache.pdfbox.text.PDFTextStripper;
14
15 import java.io.File;
16 import java.io.IOException;
17
18 public class PdfUtils {
19 private static final Logger LOGGER = LogManager.getLogger(PdfUtils.class.getSimpleName());
20 public static boolean isScannedPdf(File file, int threshold) {
21 int length = 0;
22
23 try (PDDocument document = Loader.loadPDF(file)) {
24 boolean isEncrypted = document.isEncrypted();
25 if (isEncrypted) {
26 document.setAllSecurityToBeRemoved(true);
27 }
28
29 PDFTextStripper stripper = new PDFTextStripper();
30 String text = stripper.getText(document).trim();
31 length = text.length();
32 } catch (IOException e) {
33 LOGGER.error("Error reading PDF file: {}", e.getMessage());
34 return false;
35 }
36 LOGGER.info("Processing file: {}, text size: {}", file.getPath(), length);
37 return length < threshold;
38 }
39
40 public static boolean isEncryptedPdf(File file) {
41 boolean isEncrypted;
42
43 try (PDDocument document = Loader.loadPDF(file)) {
44 isEncrypted = document.isEncrypted();
45 } catch (IOException e) {
46 LOGGER.error("Error reading PDF file: {}", e.getMessage());
47 return false;
48 }
49 LOGGER.info("Processing file: {}, is encrypted: {}", file.getPath(), isEncrypted);
50 return isEncrypted;
51 }
52
53 public static boolean isNonOutlinePdf(File file) {
54 boolean hasOutline = false;
55
56 try (PDDocument document = Loader.loadPDF(file)) {
57 boolean isEncrypted = document.isEncrypted();
58 if (isEncrypted) {
59 document.setAllSecurityToBeRemoved(true);
60 }
61
62 if (document.getDocumentCatalog() != null && document.getDocumentCatalog().getDocumentOutline() != null) {
63 hasOutline = true;
64 }
65 } catch (IOException e) {
66 LOGGER.error("Error reading PDF file: {}", e.getMessage());
67 return false;
68 }
69 LOGGER.info("Processing file: {}, has outline: {}", file.getPath(), hasOutline);
70 return !hasOutline;
71 }
72
73 public static boolean hasAnnotations(File file) {
74 boolean hasAnnotations = false;
75
76 try (PDDocument document = Loader.loadPDF(file)) {
77 boolean isEncrypted = document.isEncrypted();
78 if (isEncrypted) {
79 document.setAllSecurityToBeRemoved(true);
80 }
81 PDDocumentCatalog catalog = document.getDocumentCatalog();
82 if (catalog == null) {
83 return false;
84 }
85 PDPageTree pages = document.getDocumentCatalog().getPages();
86 if (pages == null || pages.getCount() == 0) {
87 return false;
88 }
89
90 for (PDPage page : pages) {
91 if (page.getAnnotations() != null && !page.getAnnotations().isEmpty()) {
92 int pageNumber = page.getCOSObject().getInt("PageNumber", 0);
93 String subType = page.getAnnotations().get(0).getSubtype();
94 LOGGER.info("Found annotations on page: {}, subType: {}", pageNumber, subType);
95 if (!subType.equals("Link")) {
96 hasAnnotations = true;
97 break;
98 }
99 }
100 }
101 } catch (IOException e) {
102 LOGGER.error("Error reading PDF file: {}", e.getMessage());
103 return hasAnnotations;
104 }
105 LOGGER.info("Processing file: {}, has annotations: {}", file.getPath(), hasAnnotations);
106 return hasAnnotations;
107 }
108
109 public static void removePassword(File encryptedFile, File targetDir) {
110 try (PDDocument document = Loader.loadPDF(encryptedFile)) {
111 boolean isEncrypted = document.isEncrypted();
112 if (isEncrypted) {
113 document.setAllSecurityToBeRemoved(true);
114 }
115 String targetFilePath = targetDir.getAbsolutePath() + File.separator + encryptedFile.getName();
116 document.save(targetFilePath);
117 LOGGER.info("Remove password success: {}", targetFilePath);
118 } catch (IOException e) {
119 LOGGER.error("Error processing PDF file: {}", e.getMessage());
120 }
121 }
122
123 public static void removePasswordWithIText(File encryptedFile, File targetDir) {
124 PdfReader reader;
125 try {
126 reader = new PdfReader(encryptedFile).setUnethicalReading(true);
127 } catch (IOException e) {
128 LOGGER.error("Error reading PDF file: {}", e.getMessage());
129 return;
130 }
131 String targetFilePath = targetDir.getAbsolutePath() + File.separator + encryptedFile.getName();
132
133 try (PdfDocument pdfDoc = new PdfDocument(reader,
134 new PdfWriter(targetFilePath))) {
135 LOGGER.info("Remove password success: {}", targetFilePath);
136 } catch (IOException e) {
137 LOGGER.error("Error writing PDF file: {}", e.getMessage());
138 }
139 }
140 }